Here we show the code to reproduce the analyses of: Risso and Pagnotta (2020). Per-sample standardization and asymmetric winsorization lead to accurate classification of RNA-seq expression profiles. In preparation.

This file belongs to the repository: https://github.com/drisso/awst_analysis.

The code is released with license GPL v3.0.

Install and load awst

if (!requireNamespace("BiocManager", quietly = TRUE))
    install.packages("BiocManager")

BiocManager::install("drisso/awst")

The synthetic dataset

… We then created \(k=5\) groups of samples, each made of \(M=30\) replicate samples, randomly selected (with replacement) from the original set of 80. For each group, we randomly selected (without replacement) \(J=500\) genes, whose expression was altered according to the following multiplicative model. \[ \tilde{y}_j = y_j \cdot (0.001 + r_j), \quad j=1,\ldots,J, \] where \(y_j\) denotes the observed expression of gene \(j\), \(\tilde{y}\) denotes the perturbed expression, and \(r_j\) is the realization of a Gamma random variable with shape parameter \(a=0.5\) and scale parameter \(s=1\).

#BiocManager::install("seqc") # uncomment if necessary
rm(list = ls())
set.seed(20200413)
library(seqc)
  
ddata <- get("ILM_aceview_gene_MAY")
ddata <- ddata[!is.na(ddata$EntrezID),]
ddata <- ddata[!ddata$IsERCC,]
  
feature_annotation <- data.frame(ddata[, 1:3], row.names = ddata$EntrezID)
dim(ddata <- as.matrix(ddata[, -c(1:4)]))    #[1] 19701   384
sum(duplicated(feature_annotation$Symbol))   #[1] 0
sum(duplicated(feature_annotation$EntrezID)) #[1] 0
row.names(ddata) <- feature_annotation$EntrezID
  
dim(sample_annotation.df <- data.frame(row.names = colnames(ddata), sample = colnames(ddata))) #[1] 384   1
sample_annotation.df$lane <- substr(sample_annotation.df$sample, 6, 7)
sample_annotation.df$flow_cell <- substr(sample_annotation.df$sample, 17, 17)
sample_annotation.df$sample <- substr(sample_annotation.df$sample, 1, 3)
  
sample_annotation.df <- sample_annotation.df[grep("A", sample_annotation.df$sample),]
dim(ddata <- ddata[, rownames(sample_annotation.df)]) #[1] 19701    80
  
M <- 30
nnumbers <- c("01", "02", "03", "04", "05", "06", "07", "08", "09", paste(10:M))
tmp <- c(paste0("A", nnumbers), paste0("B", nnumbers), paste0("C", nnumbers), 
         paste0("D", nnumbers), paste0("E", nnumbers))
  
design.df <- data.frame(row.names = tmp, sample = tmp, original.sample = NA) 
design.df$original.sample[1:M]           <- sample(rownames(sample_annotation.df), M, replace = TRUE)
design.df$original.sample[(M+1):(2*M)]   <- sample(rownames(sample_annotation.df), M, replace = TRUE)
design.df$original.sample[(2*M+1):(3*M)] <- sample(rownames(sample_annotation.df), M, replace = TRUE)
design.df$original.sample[(3*M+1):(4*M)] <- sample(rownames(sample_annotation.df), M, replace = TRUE)
design.df$original.sample[(4*M+1):(5*M)] <- sample(rownames(sample_annotation.df), M, replace = TRUE)
#  table(design.df$original.sample)
  
k <- "A"
wwhich <- grep(k, design.df$sample)
synthetic_data <- ddata[, design.df$original.sample[wwhich]]
colnames(synthetic_data) <- design.df$sample[wwhich]
  
the_genes <- rownames(synthetic_data)
no_of_altered_genes <- 500

genes_to_alterate <- sample(the_genes, no_of_altered_genes, replace = FALSE)
the_genes <- the_genes[-which(the_genes %in% genes_to_alterate)]

for(jj in 1:M) {
  tmp <- 0.001 + rgamma(length(genes_to_alterate), shape = 0.5, scale = 1)
  synthetic_data[genes_to_alterate, jj] <- synthetic_data[genes_to_alterate, jj] * tmp
}
  
#  k <- "B"
for(k in c("B", "C", "D", "E")) {
  wwhich <- grep(k, design.df$sample)
  tmp_data <- ddata[, design.df$original.sample[wwhich]]
  colnames(tmp_data) <- design.df$sample[wwhich]
    
  genes_to_alterate <- sample(the_genes, no_of_altered_genes, replace = FALSE)
  the_genes <- the_genes[-which(the_genes %in% genes_to_alterate)]

  for(jj in 1:M) {
    tmp <- 0.001 + rgamma(length(genes_to_alterate), shape = 0.5, scale = 1)
    tmp_data[genes_to_alterate, jj] <- tmp_data[genes_to_alterate, jj] * tmp
  }
  
    
  synthetic_data <- cbind(synthetic_data, tmp_data)
}
dim(ddata <- floor(synthetic_data)) #[1] 19701   150
  
annotation.df <- data.frame(samples = colnames(ddata), row.names = colnames(ddata))
annotation.df$sample <- substr(annotation.df$samples, 1, 1)
annotation.df$sample.col <- factor(annotation.df$sample)
levels(annotation.df$sample.col) <- clust.col <- c("gold", "red", "green2", "blue", "cyan")
names(clust.col) <- unique(annotation.df$sample)
   
save(ddata, annotation.df, feature_annotation, clust.col, file = "synthetic20200413.RData")
  
#tmp <- cbind(annotation.df, t(ddata))
#write.table(tmp, file = "synthetic20200413.tsv", sep = "\t", quote = FALSE, row.names = FALSE)
#write.table(feature_annotation, file = "synthetic20200413_features_annotation.tsv", sep = "\t", quote = FALSE, row.names = FALSE)

Figure 2

Performance of different data pre-processing paired with the hierarchical clustering (Euclidean distance, Ward’s linkage). a) AWST data pre-processing; b) Hart’s data pre-processing; c) Radovich’s data pre-processing; d) TCGA data pre-processing. The “sample” bar indicates the true partition, and the “clust” bar indicates the inferred partition obtained by cutting the tree to get 5 clusters. The Calinski-Harabasz curve is superimposed in each panel.

Supplementary Figure 1

(Extended Figure 2) Performance of different data pre-processing paired with the hierarchical clustering (Euclidean distance, Ward’s linkage). a) AWST data pre-processing; b) Hart’s data pre-processing; c) Radovich’s data pre-processing; d) TCGA data pre-processing; e) FPKM pre-processing with top 2,500 features according to standard-deviation; f) FPKM pre-processing with top 5,000 features according to standard-deviation; g) rlog pre-processing; h) VST pre-processing; i) Townes’ transformation (null residuals with deviance); l) Townes’ transformation (null residuals with pearson). The “sample” bar indicates the true partition, and the “clust” bar indicates the inferred partition obtained by cutting the tree to get 5 clusters. The Calinski-Harabasz curve is superimposed in each panel.

Supplementary Figure 2

Study for the synthetic data of the effects of data pre-processing on the compactness of groups respect to the theoretical partition and Euclidean distance. a) AWST; b) Hart; c) Radovich; d) TCGA; e) FPKM pre-processing with top 2500 features according to standard-deviation; f) FPKM pre-processing with top 5000 features according to standard-deviation; g) rlog pre-processing; h) VST pre-processing; i) Townes’ transformation (null residuals with deviance); l) Townes’ transformation (null residuals with pearson).

Supplementary Figure 3

Performance of different data pre-processing paired with ConsensusClusterPlus (inner and outer average linkage with Pearson’s correlation as distance matrix). a) AWST data pre-processing; b) Hart’s data pre-processing; c) Radovich’s protocol; d) TCGA data protocol; e) FPKM pre-processing with top 2,500 features according to standard-deviation; f) FPKM pre-processing with top 5,000 features according to standard-deviation; g) rlog pre-processing; h) VST pre-processing; i) Townes’ transformation (null residuals with deviance); l) Townes’ transformation (null residuals with pearson). The “sample’’ bar indicates the true partition, and the”consensus’’ bar indicates the inferred partition obtained by requiring 5 clusters.

Supplementary Figure 4

Performance of different data pre-processing paired with ConsensusClusterPlus (Euclidean distance and PAM method). a) AWST data pre-processing; b) Hart’s data pre-processing; c) Radovich’s pre-processing; d) TCGA data pre-processing; e) FPKM pre-processing with top 2,500 features according to standard-deviation; f) FPKM pre-processing with top 5,000 features according to standard-deviation; g) rlog pre-processing; h) VST pre-processing; i) Townes’ transformation (null residuals with deviance); l) Townes’ transformation (null residuals with pearson). The “sample’’ bar indicates the true partition, and the”consensus’’ bar indicates the inferred partition obtained by requiring 5 clusters.

AWST procedure

hclust (Euclidean/Ward)

##    user  system elapsed 
##   0.919   0.007   0.928

## null device 
##           1
## null device 
##           1
## cluster accuracy (eca): 0.9917
## cluster purity (ecp): 0.9914
## adjusted Rand's index (ari): 0.9832
## G index (geometric average of eca, ecp, and ari): 0.9887
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 5

ConsensusClusterPlus (default parameters)

Result of ConsensuClusterPlus with default setup
(innerLinkage=“average”, finalLinkage=“average”, distance=“pearson”)

## null device 
##           1
## cluster accuracy (eca): 1
## cluster purity (ecp): 1
## adjusted Rand's index (ari): 1
## G index (geometric average of eca, ecp, and ari): 1
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 5

ConsensusClusterPlus (PAM)

Result of ConsensuClusterPlus with clusterAlg = “pam” and distance = “euclidean” (other parameters left to default)

## null device 
##           1
## cluster accuracy (eca): 1
## cluster purity (ecp): 1
## adjusted Rand's index (ari): 1
## G index (geometric average of eca, ecp, and ari): 1
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 5

Hart 2013

Finding the active genes in deep RNA-seq gene expression studies

hclust (Euclidean/Ward)

##    user  system elapsed 
##   0.609   0.000   0.608

## null device 
##           1
## null device 
##           1
## cluster accuracy (eca): 0.9122
## cluster purity (ecp): 0.8829
## adjusted Rand's index (ari): 0.6779
## G index (geometric average of eca, ecp, and ari): 0.8173
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 5

ConsensusClusterPlus (default parameters)

Result of ConsensuClusterPlus with default setup
(innerLinkage=“average”, finalLinkage=“average”, distance=“pearson”)

## null device 
##           1
## cluster accuracy (eca): 0.7377
## cluster purity (ecp): 0.9315
## adjusted Rand's index (ari): 0.6924
## G index (geometric average of eca, ecp, and ari): 0.7806
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 5

ConsensusClusterPlus (PAM)

Result of ConsensuClusterPlus with clusterAlg = “pam” and distance = “euclidean” (other parameters left to default)

## null device 
##           1
## cluster accuracy (eca): 1
## cluster purity (ecp): 1
## adjusted Rand's index (ari): 1
## G index (geometric average of eca, ecp, and ari): 1
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 5

Radovich (2018) procedure

The Integrated Genomic Landscape of Thymic Epithelial Tumors

hclust (Euclidean/Ward)

##    user  system elapsed 
##   1.227   0.000   1.227

## null device 
##           1
## cluster accuracy (eca): 0.6775
## cluster purity (ecp): 0.6563
## adjusted Rand's index (ari): 0.0848
## G index (geometric average of eca, ecp, and ari): 0.3354
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 5

ConsensusClusterPlus (default parameters)

Result of ConsensuClusterPlus with default setup
(innerLinkage=“average”, finalLinkage=“average”, distance=“pearson”)

## cluster accuracy (eca): 1
## cluster purity (ecp): 1
## adjusted Rand's index (ari): 1
## G index (geometric average of eca, ecp, and ari): 1
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 5

ConsensusClusterPlus (PAM)

Result of ConsensuClusterPlus with clusterAlg = “pam” and distance = “euclidean” (other parameters left to default)

## cluster accuracy (eca): 0.6201
## cluster purity (ecp): 0.6072
## adjusted Rand's index (ari): 0.0347
## G index (geometric average of eca, ecp, and ari): 0.2355
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 5

TCGA (2015) procedure

Comprehensive, Integrative Genomic Analysis of Diffuse Lower-Grade Gliomas - Supplementary Appendix (see pages 23-24)

hclust (Euclidean/Ward)

##    user  system elapsed 
##   3.701   0.044   3.750

## cluster accuracy (eca): 0.3872
## cluster purity (ecp): 0.8364
## adjusted Rand's index (ari): 0.0013
## G index (geometric average of eca, ecp, and ari): 0.0742
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 5

ConsensusClusterPlus (default parameters)

Result of ConsensuClusterPlus with default setup
(innerLinkage=“average”, finalLinkage=“average”, distance=“pearson”)

## null device 
##           1
## cluster accuracy (eca): 0.5595
## cluster purity (ecp): 0.6445
## adjusted Rand's index (ari): 0.1102
## G index (geometric average of eca, ecp, and ari): 0.3412
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 5

ConsensusClusterPlus (PAM)

Result of ConsensuClusterPlus with clusterAlg = “pam” and distance = “euclidean” (other parameters left to default)

## null device 
##           1
## cluster accuracy (eca): 0.3872
## cluster purity (ecp): 0.8364
## adjusted Rand's index (ari): 0.0013
## G index (geometric average of eca, ecp, and ari): 0.0742
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 5

FPKM: Top 2500 features

##    user  system elapsed 
##   0.136   0.008   0.145

## null device 
##           1
## cluster accuracy (eca): 0.881
## cluster purity (ecp): 0.8299
## adjusted Rand's index (ari): 0.4637
## G index (geometric average of eca, ecp, and ari): 0.6973
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 5

ConsensusClusterPlus (default parameters)

Result of ConsensuClusterPlus with default setup
(innerLinkage=“average”, finalLinkage=“average”, distance=“pearson”)

## null device 
##           1
## cluster accuracy (eca): 0.7717
## cluster purity (ecp): 0.7711
## adjusted Rand's index (ari): 0.5204
## G index (geometric average of eca, ecp, and ari): 0.6765
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 5

ConsensusClusterPlus (PAM)

Result of ConsensuClusterPlus with clusterAlg = “pam” and distance = “euclidean” (other parameters left to default)

## null device 
##           1
## cluster accuracy (eca): 1
## cluster purity (ecp): 1
## adjusted Rand's index (ari): 1
## G index (geometric average of eca, ecp, and ari): 1
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 5

FPKM: Top 5000 features

## null device 
##           1
## cluster accuracy (eca): 0.3872
## cluster purity (ecp): 0.8364
## adjusted Rand's index (ari): 0.0013
## G index (geometric average of eca, ecp, and ari): 0.0742
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 5

ConsensusClusterPlus (default parameters)

Result of ConsensuClusterPlus with default setup
(innerLinkage=“average”, finalLinkage=“average”, distance=“pearson”)

## null device 
##           1
## cluster accuracy (eca): 0.6104
## cluster purity (ecp): 0.6294
## adjusted Rand's index (ari): 0.1812
## G index (geometric average of eca, ecp, and ari): 0.4114
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 5

ConsensusClusterPlus (PAM)

Result of ConsensuClusterPlus with clusterAlg = “pam” and distance = “euclidean” (other parameters left to default)

## null device 
##           1
## cluster accuracy (eca): 0.7841
## cluster purity (ecp): 0.8917
## adjusted Rand's index (ari): 0.6188
## G index (geometric average of eca, ecp, and ari): 0.7564
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 5

VST (DESeq2): all features

## null device 
##           1
## cluster accuracy (eca): 1
## cluster purity (ecp): 1
## adjusted Rand's index (ari): 1
## G index (geometric average of eca, ecp, and ari): 1
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 5

ConsensusClusterPlus (default parameters)

Result of ConsensuClusterPlus with default setup
(innerLinkage=“average”, finalLinkage=“average”, distance=“pearson”)

## Loading required package: ConsensusClusterPlus
## end fraction
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered

## cluster accuracy (eca): 1
## cluster purity (ecp): 1
## adjusted Rand's index (ari): 1
## G index (geometric average of eca, ecp, and ari): 1
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 5

ConsensusClusterPlus (PAM)

Result of ConsensuClusterPlus with clusterAlg = “pam” and distance = “euclidean” (other parameters left to default)

## end fraction
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered

## null device 
##           1
## cluster accuracy (eca): 1
## cluster purity (ecp): 1
## adjusted Rand's index (ari): 1
## G index (geometric average of eca, ecp, and ari): 1
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 5

rLog (DESeq2): all features

## null device 
##           1
## cluster accuracy (eca): 0.9309
## cluster purity (ecp): 0.8527
## adjusted Rand's index (ari): 0.3833
## G index (geometric average of eca, ecp, and ari): 0.6726
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 5

ConsensusClusterPlus (default parameters)

Result of ConsensuClusterPlus with default setup
(innerLinkage=“average”, finalLinkage=“average”, distance=“pearson”)

## null device 
##           1
## cluster accuracy (eca): 0.8166
## cluster purity (ecp): 0.7942
## adjusted Rand's index (ari): 0.509
## G index (geometric average of eca, ecp, and ari): 0.6911
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 5

ConsensusClusterPlus (PAM)

Result of ConsensuClusterPlus with clusterAlg = “pam” and distance = “euclidean” (other parameters left to default)

## null device 
##           1
## cluster accuracy (eca): 0.9625
## cluster purity (ecp): 0.954
## adjusted Rand's index (ari): 0.8968
## G index (geometric average of eca, ecp, and ari): 0.9373
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 5

rLog (DESeq2) 2,500 HVG

## null device 
##           1
## null device 
##           1
## cluster accuracy (eca): 1
## cluster purity (ecp): 1
## adjusted Rand's index (ari): 1
## G index (geometric average of eca, ecp, and ari): 1
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 5

ConsensusClusterPlus (default parameters)

Result of ConsensuClusterPlus with default setup
(innerLinkage=“average”, finalLinkage=“average”, distance=“pearson”)

## null device 
##           1
## cluster accuracy (eca): 1
## cluster purity (ecp): 1
## adjusted Rand's index (ari): 1
## G index (geometric average of eca, ecp, and ari): 1
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 5

ConsensusClusterPlus (PAM)

Result of ConsensuClusterPlus with clusterAlg = “pam” and distance = “euclidean” (other parameters left to default)

## null device 
##           1
## cluster accuracy (eca): 1
## cluster purity (ecp): 1
## adjusted Rand's index (ari): 1
## G index (geometric average of eca, ecp, and ari): 1
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 5

Townes/deviance (scry)

## null device 
##           1
## null device 
##           1
## cluster accuracy (eca): 0.9865
## cluster purity (ecp): 0.9856
## adjusted Rand's index (ari): 0.9669
## G index (geometric average of eca, ecp, and ari): 0.9796
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 5

ConsensusClusterPlus (default parameters)

Result of ConsensuClusterPlus with default setup
(innerLinkage=“average”, finalLinkage=“average”, distance=“pearson”)

## null device 
##           1
## cluster accuracy (eca): 1
## cluster purity (ecp): 1
## adjusted Rand's index (ari): 1
## G index (geometric average of eca, ecp, and ari): 1
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 5

ConsensusClusterPlus (PAM)

## null device 
##           1
## cluster accuracy (eca): 0.9665
## cluster purity (ecp): 0.9627
## adjusted Rand's index (ari): 0.9166
## G index (geometric average of eca, ecp, and ari): 0.9483
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 5

Townes/deviance (scry) 2,500HVG

## cluster accuracy (eca): 0.9865
## cluster purity (ecp): 0.9856
## adjusted Rand's index (ari): 0.9669
## G index (geometric average of eca, ecp, and ari): 0.9796
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 5

ConsensusClusterPlus (default parameters)

Result of ConsensuClusterPlus with default setup
(innerLinkage=“average”, finalLinkage=“average”, distance=“pearson”)

## cluster accuracy (eca): 1
## cluster purity (ecp): 1
## adjusted Rand's index (ari): 1
## G index (geometric average of eca, ecp, and ari): 1
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 5

ConsensusClusterPlus (PAM)

## cluster accuracy (eca): 0.971
## cluster purity (ecp): 0.9685
## adjusted Rand's index (ari): 0.933
## G index (geometric average of eca, ecp, and ari): 0.9573
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 5

Townes/pearson (scry)

## null device 
##           1
## cluster accuracy (eca): 0.928
## cluster purity (ecp): 0.9102
## adjusted Rand's index (ari): 0.7555
## G index (geometric average of eca, ecp, and ari): 0.861
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 5

ConsensusClusterPlus (default parameters)

Result of ConsensuClusterPlus with default setup
(innerLinkage=“average”, finalLinkage=“average”, distance=“pearson”)

## null device 
##           1
## cluster accuracy (eca): 0.9264
## cluster purity (ecp): 0.9208
## adjusted Rand's index (ari): 0.8227
## G index (geometric average of eca, ecp, and ari): 0.8886
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 5

ConsensusClusterPlus (PAM)

## null device 
##           1
## cluster accuracy (eca): 0.9297
## cluster purity (ecp): 0.9172
## adjusted Rand's index (ari): 0.805
## G index (geometric average of eca, ecp, and ari): 0.8821
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 5

Townes/pearson (scry) 2,500HVG

## cluster accuracy (eca): 0.9689
## cluster purity (ecp): 0.9625
## adjusted Rand's index (ari): 0.9011
## G index (geometric average of eca, ecp, and ari): 0.9437
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 5

ConsensusClusterPlus (default parameters)

Result of ConsensuClusterPlus with default setup
(innerLinkage=“average”, finalLinkage=“average”, distance=“pearson”)

## null device 
##           1
## cluster accuracy (eca): 0.9826
## cluster purity (ecp): 0.9809
## adjusted Rand's index (ari): 0.9513
## G index (geometric average of eca, ecp, and ari): 0.9715
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 5

ConsensusClusterPlus (PAM)

## null device 
##           1
## cluster accuracy (eca): 0.9601
## cluster purity (ecp): 0.9534
## adjusted Rand's index (ari): 0.8845
## G index (geometric average of eca, ecp, and ari): 0.932
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 5

Table of the experiments

what with ECA ECP ARI G ASW timing
AWST HC 0.9917 0.9914 0.9832 0.9887 0.2465 0.848
AWST CCP1 1 1 1 1 NA NA
AWST CCP2 1 1 1 1 NA NA
Hart HC 0.9122 0.8829 0.6779 0.8173 0.1832 0.58
Hart CCP1 0.7377 0.9315 0.6924 0.7806 NA NA
Hart CCP2 1 1 1 1 NA NA
Radovich HC 0.6775 0.6563 0.0848 0.3354 0.0449 1.135
Radovich CCP1 1 1 1 1 NA NA
Radovich CCP2 0.6201 0.6072 0.0347 0.2355 NA NA
TCGA HC 0.3872 0.8364 0.0013 0.0742 0.0345 3.335
TCGA CCP1 0.5595 0.6445 0.1102 0.3412 NA NA
TCGA CCP2 0.3872 0.8364 0.0013 0.0742 NA NA
FPKM (2,500HVG) HC 0.881 0.8299 0.4637 0.6973 0.178 0.134
FPKM (2,500HVG) CCP1 0.7717 0.7711 0.5204 0.6765 NA NA
FPKM (2,500HVG) CCP2 1 1 1 1 NA NA
FPKM (5,000HVG) HC 0.3872 0.8364 0.0013 0.0742 0.0864 NA
FPKM (5,000HVG) CCP1 0.6104 0.6294 0.1812 0.4114 NA NA
FPKM (5,000HVG) CCP2 0.7841 0.8917 0.6188 0.7564 NA NA
VST HC 1 1 1 1 0.2504 3.068
VST CCP1 1 1 1 1 NA NA
VST CCP2 1 1 1 1 NA NA
rLog HC 0.9309 0.8527 0.3833 0.6726 0.1728 927.896
rLog CCP1 0.8166 0.7942 0.509 0.6911 NA NA
rLog CCP2 0.9625 0.954 0.8968 0.9373 NA NA
rLog (2,500HVG) HC 1 1 1 1 0.2533 NA
rLog (2,500HVG) CCP1 1 1 1 1 NA NA
rLog (2,500HVG) CCP2 1 1 1 1 NA NA
Townes/deviance HC 0.9865 0.9856 0.9669 0.9796 0.158 0.216
Townes/deviance CCP1 1 1 1 1 NA NA
Townes/deviance CCP2 0.9665 0.9627 0.9166 0.9483 NA NA
Townes/deviance (2,500HVG) HC 0.9865 0.9856 0.9669 0.9796 0.191 NA
Townes/deviance (2,500HVG) CCP1 1 1 1 1 NA NA
Townes/deviance (2,500HVG) CCP2 0.971 0.9685 0.933 0.9573 NA NA
Townes/pearson HC 0.928 0.9102 0.7555 0.861 0.085 0.048
Townes/pearson CCP1 0.9264 0.9208 0.8227 0.8886 NA NA
Townes/perason CCP2 0.9297 0.9172 0.805 0.8821 NA NA
Townes/pearson (2,500HVG) HC 0.9689 0.9625 0.9011 0.9437 0.1099 NA
Townes/pearson (2,500HVG) CCP1 0.9826 0.9809 0.9513 0.9715 NA NA
Townes/pearson (2,500HVG) CCP2 0.9601 0.9534 0.8845 0.932 NA NA

HC) hirerachical clustering with Ward’s likage and Euclidean distance; CPP1) ConsensusClusterPlus with average innner and outer linkage, and Pearson’s correlation as distance; CCP2) ConsensusClusterPlus with PAM and Euclidean distance;

Average Silhouettes Width

## null device 
##           1